Cream of the Crop 25

home *** CD-ROM | disk | FTP | other *** search

/ Cream of the Crop 25 / Cream of the Crop 25.iso / os2 / gnuwget.zip / wget-1.4.3 / src / utils.c < prev next >

Wrap

C/C++ Source or Header | 1997-02-14 | 19KB | 888 lines

/* Various functions of utilitarian nature. Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifdef HAVE_CONFIG_H # include <config.h> #endif /* HAVE_CONFIG_H */ #include <stdio.h> #include <stdlib.h> #ifdef HAVE_STRING_H # include <string.h> #else # include <strings.h> #endif /* HAVE_STRING_H */ #include <ctype.h> #ifdef HAVE_PWD_H # include <pwd.h> #endif #ifdef WINDOWS # include <direct.h> # define mkdir(a, b) _mkdir(a) /* bletch! */ #endif /* WINDOWS */ #include <sys/types.h> #include <sys/stat.h> #ifdef HAVE_UNISTD_H # include <unistd.h> #endif #include <assert.h> #include <limits.h> #ifdef HAVE_UTIME_H # include <utime.h> #endif #ifdef HAVE_SYS_UTIME_H # include <sys/utime.h> #endif #include <errno.h> #include "wget.h" #include "utils.h" #include "options.h" #include "mtch.h" #ifndef errno extern int errno; #endif extern struct options opt; /* nmalloc, nrealloc and nstrdup exit the program if there is not enough memory. nstrdup also implements strdup on systems that do not have it. */ void * nmalloc(size_t size) { void *res; res = malloc(size); if (!res) memfatal("malloc"); return res; } void * nrealloc(void *obj, size_t size) { void *res; /* Not all Unixes have the feature of realloc() that calling it with a NULL-pointer is the same as malloc(), but it is easy to simulate. */ if (obj) res = realloc(obj, size); else res = malloc(size); if (!res) memfatal("realloc"); return res; } char * nstrdup(const char *s) { char *s1; #ifndef HAVE_STRDUP int l; l = strlen(s); s1 = malloc(l + 1); if (!s1) memfatal("strdup"); memcpy(s1, s, l + 1); return s1; #else s1 = strdup(s); if (!s1) memfatal("strdup"); return s1; #endif } /* Croak the fatal memory error and bail out with non-zero exit status. */ void memfatal(const char *s) { fprintf(opt.lfile, "%s: Not enough memory.\n", s); exit(1); } /* Copy the string formed by two pointers (one on the beginning, other on the char after the last char) to a new, malloc-ed location. 0-terminate it. */ char * strdupdelim(const char *beg, const char *end) { char *res; res = (char *)nmalloc(end - beg + 1); memcpy(res, beg, end - beg); res[end - beg] = '\0'; return res; } /* Returns an error message for the error ERRNUM. Requires more work. */ const char * uerrmsg(uerr_t errnum) { switch (errnum) { case URLUNKNOWN: return "Unknown/unsupported protocol"; break; case URLBADPORT: return "Invalid port specification"; break; case URLBADHOST: return "Invalid host name"; break; default: assert(0); } } /* Parse a string containing comma-separated elements, and return a vector of char pointers with the elements. Spaces following the commas are ignored. */ char ** sepstring(const char *s) { char **res; const char *p; int i; if (!s || !*s) return NULL; res = NULL; p = s; i = 0; while (*s) { if (*s == ',') { res = (char **)nrealloc(res, (i + 2) * sizeof(char *)); res[i] = strdupdelim(p, s); res[++i] = NULL; ++s; /* Skip the blanks following the ','. */ while (isspace(*s)) ++s; p = s; } else ++s; } res = (char **)nrealloc(res, (i + 2) * sizeof(char *)); res[i] = strdupdelim(p, s); res[++i] = NULL; return res; } /* Compare s1 and s2 frontally; s1 must be a subset of s2. E.g. if s1 is `/something', s2 must begin with `/something' to make frontcmp return 1. Otherwise, frontcmp will return 0. */ int frontcmp(const char *s1, const char *s2) { for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2); return !*s1; } /* A cuserid() immitation using getpwuid(), to avoid hassling with utmp. Besides, not all systems have cuerid(). */ char * mycuserid(char *where) { #ifdef WINDOWS if (where) return nstrdup(""); else return NULL; #else /* not WINDOWS */ struct passwd *pwd; if (!(pwd = getpwuid(getuid())) || !pwd->pw_name) return NULL; if (where) { strcpy(where, pwd->pw_name); return where; } else return pwd->pw_name; #endif /* not WINDOWS */ } /* Canonicalize PATH, and return a new path. The new path differs from PATH in that: Multple `/'s are collapsed to a single `/'. Leading `./'s and trailing `/.'s are removed. Trailing `/'s are removed. Non-leading `../'s and trailing `..'s are handled by removing portions of the path. E.g. "a/b/c/./../d/.." will yield "a/b". Changes by hniksic: Always use '/' as stub_char. Don't check for local things using canon_stat. Change the original string instead of strdup-ing. React correctly when beginning with `./' and `../'. */ void path_simplify(char *path) { register int i, start, ddot; char stub_char; if (!*path) return; /*stub_char = (*path == '/') ? '/' : '.';*/ stub_char = '/'; /* Addition: Remove all `./'-s preceding the string. If `../'-s precede, put `/' in front and remove them too. */ i = 0; ddot = 0; while (1) { if (path[i] == '.' && path[i + 1] == '/') i += 2; else if (path[i] == '.' && path[i + 1] == '.' && path[i + 2] == '/') { i += 3; ddot = 1; } else break; } if (i) strcpy(path, path + i - ddot); /* Replace single `.' or `..' with `/'. */ if ((path[0] == '.' && path[1] == '\0') || (path[0] == '.' && path[1] == '.' && path[2] == '\0')) { path[0] = stub_char; path[1] = '\0'; return; } /* Walk along PATH looking for things to compact. */ i = 0; while (1) { if (!path[i]) break; while (path[i] && path[i] != '/') i++; start = i++; /* If we didn't find any slashes, then there is nothing left to do. */ if (!path[start]) break; /* Handle multiple `/'s in a row. */ while (path[i] == '/') i++; if ((start + 1) != i) { strcpy (path + start + 1, path + i); i = start + 1; } /* Check for trailing `/'. */ if (start && !path[i]) { zero_last: path[--i] = '\0'; break; } /* Check for `../', `./' or trailing `.' by itself. */ if (path[i] == '.') { /* Handle trailing `.' by itself. */ if (!path[i + 1]) goto zero_last; /* Handle `./'. */ if (path[i + 1] == '/') { strcpy (path + i, path + i + 1); i = (start < 0) ? 0 : start; continue; } /* Handle `../' or trailing `..' by itself. */ if (path[i + 1] == '.' && (path[i + 2] == '/' || !path[i + 2])) { while (--start > -1 && path[start] != '/'); strcpy (path + start + 1, path + i + 2); i = (start < 0) ? 0 : start; continue; } } /* path == '.' */ } /* while */ if (!*path) { *path = stub_char; path[1] = '\0'; } } /* "Touch" FILE, i.e. make its atime and mtime equal to the time specified with TM. */ void my_touch(char *file, time_t tm) { #ifdef HAVE_STRUCT_UTIMBUF struct utimbuf times; times.actime = times.modtime = tm; #else time_t times[2]; times[0] = times[1] = tm; #endif if (utime(file, ×) == -1) { if (!opt.quiet) fprintf(opt.lfile, "utime: %s\n", mystrerror(errno)); } } /* Checks if a file is a symbolic link, and removes it if it is. Does nothing under MS-Windows. */ int remove_link(const char *file) { int err = 0; struct stat st; #ifndef __EMX__ #ifndef WINDOWS if (lstat(file, &st) == 0 && S_ISLNK(st.st_mode)) { #ifdef DEBUG if (opt.debug) fprintf(opt.lfile, "Unlinking %s (symlink).\n", file); #endif err = unlink(file); if (err != 0) if (opt.verbose) fprintf(opt.lfile, "Failed to unlink symlink `%s': %s\n", file, mystrerror(errno)); } #endif /* WINDOWS */ #endif /* __EMX__ */ return err; } /* Does a file exist? This is quite a lousy implementation, since it supplies no error codes -- only a yes-or-no answer. Thus it will return that a file does not exist if, e.g., the directory is unreadable. I don't mind it too much currently, though. The proper way should, of course, be to have a third, error state, other than true/false, but that would make the calling functions much more complex. */ int exists(const char *filename) { struct stat buf; return stat(filename, &buf) ? 0 : 1; } /* Returns 0 if the path is a directory, 1 otherwise. Returns 0 on error. */ int isfile(const char *path) { struct stat buf; if (stat(path, &buf) != 0) return 0; return S_ISDIR(buf.st_mode) ? 0 : 1; } /* The function that takes the dirname to be created, making sure that missing directories are made one by one. Its behaviour should be similar to mkdir -p on systems that support it. */ int mymkdir(const char *d) { int i, status, quit; char *dir; struct stat stbuf; /* Make a copy of dir, to be able to write to it. Otherwise, the function is unsafe if called with a read-only char *argument. */ dir = nstrdup(d); /* If the first character of dir is '/', skip it (and thus enable creation of absolute-pathname directories. */ quit = 0; for (i = (*dir == '/'); 1; ++i) { for (; dir[i] && dir[i] != '/'; i++) ; if (!dir[i]) quit = 1; dir[i] = '\0'; /* Check whether the directory already exists. */ status = stat(dir, &stbuf); if (status != 0) { if (mkdir(dir, opt.dirmode) < 0) { free(dir); return -1; } } if (quit) break; else dir[i] = '/'; } /* for */ free(dir); return 0; } /* Determine whether a file is acceptable to be followed, according to lists of patterns to accept/reject. */ int acceptable(const char *s) { int l = strlen(s); while (l && s[l] != '/') --l; if (s[l] == '/') s += (l + 1); if (opt.accepts) { if (opt.rejects) return (in_acclist((const char **)opt.accepts, s, 1) && !in_acclist((const char **)opt.rejects, s, 1)); else return in_acclist((const char **)opt.accepts, s, 1); } else if (opt.rejects) return !in_acclist((const char **) opt.rejects, s, 1); return 1; } /* Returns whether a directory is acceptable for download, wrt include/exclude lists. If the argument flags is ALLABS, the leading '/' is ignored in paths; relative and absolute paths may be freely intermixed. */ int accdir(const char *s, enum accd flags) { char **x, *p; /* Remove starting '/'. */ if (flags & ALLABS && *s == '/') ++s; if (opt.includes) { for (x = opt.includes; *x; x++) { p = *x + (flags & ALLABS && **x == '/'); /* Remove '/' */ if (frontcmp(p, s)) break; } if (!*x) return 0; } if (opt.excludes) { for (x = opt.excludes; *x; x++) { p = *x + (flags & ALLABS && **x == '/'); /* Remove '/' */ if (frontcmp(p, s)) break; } if (*x) return 0; } return 1; } /* Match a string against a pattern, backwards. E.g.: match_backwards("abc", "bc") -> 1 match_backwards("abc", "ab") -> 0 match_backwards("abc", "abc") -> 1 */ int match_backwards(const char *string, const char *pattern) { int i, j; for (i = strlen(string), j = strlen(pattern); i >= 0 && j >= 0; i--, j--) if (string[i] != pattern[j]) break; /* If the pattern was exhausted, the match was succesful. */ if (j == -1) return 1; else return 0; } /* Does a URL match each element of a list. List elements are matched with fnmatch() or match_backwards(), according to whether the pattern (or suffix) contains globbing characters. If the argument backward is unset, don't do backward comparison -- just compare them normally. */ int in_acclist(const char **accepts, const char *s, int backward) { for (; *accepts; accepts++) { if (has_wildcards(*accepts)) { /* fnmatch returns 0 if the pattern *does* match the string. */ if (fnmatch(*accepts, s, 0) == 0) return 1; } else { if (backward) { if (match_backwards(s, *accepts)) return 1; } else { if (!strcmp(s, *accepts)) return 1; } } } return 0; } /* Return the malloc-ed suffix of a filename */ char * suffix(const char *s) { int i; for (i = strlen(s); i && s[i] != '/' && s[i] != '.'; i--); if (s[i++] == '.') return nstrdup(s + i); else return NULL; } /* The function reads a whole line. It reads the line realloc-ing the storage exponentially, doubling the storage after each overflow to minimize the number of calls to realloc(). It is not an exemplary of correctness, since it kills off the newline (and no, there is no way to know if there was a newline at EOF). */ char * read_whole_line(FILE *fp) { char *line; int i, bufsize, c; i = 0; bufsize = DYNAMIC_LINE_BUFFER; line = nmalloc(bufsize); /* Construct the line. */ while ((c = getc(fp)) != EOF && c != '\n') { if (i > bufsize - 1) line = (char *)nrealloc(line, (bufsize <<= 1)); line[i++] = c; } if (c == EOF && !i) { free(line); return NULL; } /* Check for overflow at zero-termination (no need to double the buffer in this case. */ if (i == bufsize) line = (char *)nrealloc(line, i + 1); line[i] = '\0'; return line; } /* Load file to memory, return the malloc-ed buffer, and the file size. The file is loaded in chunks, each one double the size of the previous one. The first chunk is FILE_BUFFER_SIZE bytes long. */ void load_file(FILE *fp, char **buf, long *nread) { long bufsize; bufsize = FILE_BUFFER_SIZE; *nread = 0; *buf = NULL; while (!feof(fp)) { *buf = (char *)nrealloc(*buf, bufsize + *nread); *nread += fread(*buf + *nread, sizeof(char), bufsize, fp); bufsize <<= 1; } } /* Free the pointers in a NULL-terminated vector of pointers, then free the pointer itself. */ void free_vec(char **vec) { int i; if (!vec) return; for (i = 0; vec[i]; i++) free(vec[i]); free(vec); } /* Merge the two vectors (v1 will be placed before of v2). The function effectively frees the vectors v1 and v2 (their contents must not be reused after the call). If v1 is NULL, the function returns v2. */ char ** merge_vecs(char **v1, char **v2) { int i, j; if (!v1) return v2; if (!v2) return v1; if (!*v2) { /* To avoid j == 0 */ free(v2); return v1; } /* Count v1. */ for (i = 0; v1[i]; i++); /* Count v2. */ for (j = 0; v2[j]; j++); /* Reallocate v1. */ v1 = (char **)nrealloc(v1, (i + j + 1) * sizeof(char **)); memcpy(v1 + i, v2, (j + 1) * sizeof(char *)); free(v2); return v1; } /* A set of simple-minded routines to store and search for strings in a linked list. You may add a string to the slist, and peek whether it's still in there at any time later. */ /* Add an element to the list. If flags is NOSORT, the list will not be sorted. */ slist * add_slist(slist *l, const char *s, int flags) { slist *t, *old, *beg; int cmp; if (flags & NOSORT) { if (!l) { t = (slist *)nmalloc(sizeof(slist)); t->string = nstrdup(s); t->next = NULL; return t; } beg = l; /* Find the last element. */ while (l->next) l = l->next; t = (slist *)nmalloc(sizeof(slist)); l->next = t; t->string = nstrdup(s); t->next = NULL; return beg; } /* Empty list or changing the first element. */ if (!l || (cmp = strcmp(l->string, s)) > 0) { t = (slist *)nmalloc(sizeof(slist)); t->string = nstrdup(s); t->next = l; return t; } beg = l; if (cmp == 0) return beg; /* Second two one-before-the-last element. */ while (l->next) { old = l; l = l->next; cmp = strcmp(s, l->string); if (cmp == 0) /* No repeating in the list. */ return beg; else if (cmp > 0) continue; /* If the next list element is greater than s, put s between the current and the next list element. */ t = (slist *)nmalloc(sizeof(slist)); old->next = t; t->next = l; t->string = nstrdup(s); return beg; } t = (slist *)nmalloc(sizeof(slist)); t->string = nstrdup(s); /* Insert the new element after the last element. */ l->next = t; t->next = NULL; return beg; } /* Is there a specific entry in the list? */ int in_slist(slist *l, const char *s) { int cmp; while (l) { cmp = strcmp(l->string, s); if (cmp == 0) return 1; else if (cmp > 0) /* The list is ordered! */ return 0; l = l->next; } return 0; } /* Free the whole slist. */ void free_slist(slist *l) { slist *n; while (l) { n = l->next; free(l->string); free(l); l = n; } } /* Legible -- return a static pointer to the legibly printed long. */ char * legible(long l) { static char buf[20]; char inbuf[20]; int i, i1, mod; char *ptr, *in; /* Fill the buffer. */ prnum(inbuf, l); /* Reset the pointers. */ ptr = buf; in = inbuf; /* If the number is negative, shift the pointers. */ if (*in == '-') { *ptr++ = '-'; ++in; } /* How many digits before the first separator? */ mod = strlen(in) % 3; /* Insert them. */ for (i = 0; i < mod; i++) *ptr++ = in[i]; /* Now insert the rest of them, putting separator before every third digit. */ for (i1 = i, i = 0; in[i1]; i++, i1++) { if (i % 3 == 0 && i1 != 0) *ptr++ = LEGIBLE_SEPARATOR; *ptr++ = in[i1]; } /* Zero-terminate the string. */ *ptr = '\0'; return buf; } /* How many digits in a (long) integer? */ int numdigit(long a) { int res; for (res = 1; a /= 10; res++); return res; } /* Print a long integer to the string buffer. The digits are first written in reverse order (the least significant digit first), and are then reversed. */ void prnum(char *where, long num) { char *p; int i = 0, l; char c; if (num < 0) { *where++ = '-'; num = -num; } p = where; /* Print the digits to the string. */ do { *p++ = num % 10 + '0'; num /= 10; } while (num); /* And reverse them. */ l = p - where - 1; for (i = l/2; i >= 0; i--) { c = where[i]; where[i] = where[l - i]; where[l - i] = c; } where[l + 1] = '\0'; }